db_1 <- 
  "202001_202212_02_mx_youtube_data.db"

db_2 <- 
  "202301_202312_02_mx_youtube_data.db"

db_3 <- 
  "202401_202405_02_mx_youtube_data.db"

db_4 <-
  "202405_02_mx_youtube_data.db"

library(DBI)
library(RSQLite)
library(lubridate)

comments_2020_24 <- 
  data.frame()

videos_2020_24 <-
  data.frame()


for (db in c(db_1, db_2, db_3, db_4)) {

  print(db)

  # Connect to database
  con <- dbConnect(RSQLite::SQLite(), db)

  # Read comments
  comments <- dbGetQuery(con,
                         "SELECT comment_id, text_original, published_at FROM comments")

  comments_2020_24 <-
    rbind(comments_2020_24,
          comments)


  # Close connection
  dbDisconnect(con)

}

comments_2020_24$datetime <-
  ymd_hms(comments_2020_24$published_at, tz = "UTC")

comments_2020_24$datetime_mx <-
  with_tz(comments_2020_24$datetime, tzone = "America/Mexico_City")

text_original <- comments_2020_24$text_original

wc <- function(x) length(unlist(strsplit(x, "\\s+")))

library(parallel)
cl <- makeCluster(mc <- getOption("cl.cores", detectCores()))
clusterExport(cl=cl, varlist=c("wc", "text_original"), envir=environment())
output <- parLapply(cl, text_original, function(x) {
  wc(x)
}
)
stopCluster(cl)  

comments_2020_24$wc <- output

write.csv(comments_2020_24, file = "comments_2020_24_wt_wc.csv", row.names = F)
  